% Sequential

MKL_0 = [2000 2000 2000 0 688.078 ;  2500 2500 2500 0 1345.58 ;  3000 3000 3000 0 2271.38 ;  3500 3500 3500 0 3653.2 ;  4000 4000 4000 0 5360.56 ;  4500 4500 4500 0 7755.14 ;  5000 5000 5000 0 10467.9 ;  5500 5500 5500 0 14034.4 ;  6000 6000 6000 0 18074.9 ;  6500 6500 6500 0 23077.5 ;  7000 7000 7000 0 28412.4 ;  7500 7500 7500 0 35471.9 ; ];

STRASSEN_1 = [2000 2000 2000 1 667.291 ;  2500 2500 2500 1 1294.84 ;  3000 3000 3000 1 2167.26 ;  3500 3500 3500 1 3432.32 ;  4000 4000 4000 1 4959.95 ;  4500 4500 4500 1 7345.13 ;  5000 5000 5000 1 9830.11 ;  5500 5500 5500 1 13104.8 ;  6000 6000 6000 1 16521.5 ;  6500 6500 6500 1 21425.8 ;  7000 7000 7000 1 26377.8 ;  7500 7500 7500 1 32499.3 ; ];
STRASSEN_2 = [2000 2000 2000 2 661.54 ;  2500 2500 2500 2 1251.32 ;  3000 3000 3000 2 2138.93 ;  3500 3500 3500 2 3485.46 ;  4000 4000 4000 2 4836.33 ;  4500 4500 4500 2 7094.94 ;  5000 5000 5000 2 9403.85 ;  5500 5500 5500 2 12559.1 ;  6000 6000 6000 2 15789.5 ;  6500 6500 6500 2 20251.9 ;  7000 7000 7000 2 24848.1 ;  7500 7500 7500 2 30967.1 ; ];
STRASSEN_3 = [2000 2000 2000 3 672.179 ;  2500 2500 2500 3 1288.86 ;  3000 3000 3000 3 2296.55 ;  3500 3500 3500 3 3592.95 ;  4000 4000 4000 3 4887.59 ;  4500 4500 4500 3 7136.78 ;  5000 5000 5000 3 9181.16 ;  5500 5500 5500 3 12565.4 ;  6000 6000 6000 3 15561.1 ;  6500 6500 6500 3 20792.9 ;  7000 7000 7000 3 25249.2 ;  7500 7500 7500 3 31584.1 ; ];

DALBERTO_PIPED = [2000, 2.399241e+04; 2500, 2.596302e+04; 3000, 2.674492e+04; ...
 3500, 2.692750e+04; 4000, 2.765301e+04; 4500, 2.783400e+04; ...
 5000, 2.903991e+04; 5500, 3.000490e+04; 6000, 2.999555e+04; ...
 6500, 3.004978e+04; 7000, 3.063524e+04; 7500, 3.042201e+04];

dims = MKL_0(:, 1);
MKL_PERF = perf(MKL_0);
OUR_PERF = max(max(perf(STRASSEN_1), perf(STRASSEN_2)), perf(STRASSEN_3));
DALBERTO_PERF = DALBERTO_PIPED(:, 2) / 1000;

lw = 1.75;      % LineWidth
msz = 8;       % MarkerSize
make_plot_pre(8, 6);
plot(dims, MKL_PERF, 'g+-', ...
     dims, OUR_PERF, 'b+--', ...
     dims, DALBERTO_PERF, 'ro-', ...
     ...%dims, perf(ST_YES_CSE), 'mo--', ...
     'LineWidth', lw, 'MarkerSize', msz);
 legend('MKL', 'Code gen. Strassen', ['D''', 'Alberto Strassen-Winograd']);
 title('Sequential performance on $\langle N, N, N \rangle$', 'interpreter', 'latex');
 xlabel('dimension (N)'); ylabel('Effective performance (GFLOPS)');
 make_plot_post(8, 6);
 
 
 
 % Parallel
MKL_0 = [ 8000 8000 8000 0 2441.67 ;  8500 8500 8500 0 2748.94 ;  9000 9000 9000 0 3456.58 ;  9500 9500 9500 0 7670.43 ;  10000 10000 10000 0 4464.37 ;  10500 10500 10500 0 5298.56 ;  11000 11000 11000 0 6075.99 ;  11500 11500 11500 0 6884.07 ;  12000 12000 12000 0 7866.44 ;  12500 12500 12500 0 8628.21 ; ];


STRASSEN_1 = [ 8000 8000 8000 1 2413.07 ;  8500 8500 8500 1 2830.92 ;  9000 9000 9000 1 3355.96 ;  9500 9500 9500 1 3963.84 ;  10000 10000 10000 1 4473.23 ;  10500 10500 10500 1 5169.91 ;  11000 11000 11000 1 5885 ;  11500 11500 11500 1 12370.1 ;  12000 12000 12000 1 9328.05 ;  12500 12500 12500 1 9219.69 ; ];
STRASSEN_2 = [ 8000 8000 8000 2 2521.75 ;  8500 8500 8500 2 3121.97 ;  9000 9000 9000 2 3629.63 ;  9500 9500 9500 2 4265.13 ;  10000 10000 10000 2 4792.73 ;  10500 10500 10500 2 5607.35 ;  11000 11000 11000 2 6349.8 ;  11500 11500 11500 2 7267.75 ;  12000 12000 12000 2 8012 ;  12500 12500 12500 2 9146.42 ; ];
STRASSEN_3 = STRASSEN_2;


CAPS_dims = [7168, 8960, 10752, 14336, 17920]';
CAPS_times = [2.067370, 3.787081, 6.078716, 13.900579, 40.524520 ]' * 1000;
CAPS = [CAPS_dims, CAPS_dims, CAPS_dims, 2 * ones(size(CAPS_dims)), CAPS_times];

dims = MKL_0(:, 1);
num_cores = 24;
MKL_PERF = perf(MKL_0) / 24;
OUR_PERF = max(max(perf(STRASSEN_1), perf(STRASSEN_2)), perf(STRASSEN_3)) / 24;
CAPS_PERF = perf(CAPS) / 24;

lw = 1.75;      % LineWidth
msz = 8;       % MarkerSize
make_plot_pre(8, 6);
plot(dims, MKL_PERF, 'g+-', ...
     dims, OUR_PERF, 'b+--', ...
     CAPS_dims, CAPS_PERF, 'ro-', ...
     ...%dims, perf(ST_YES_CSE), 'mo--', ...
     'LineWidth', lw, 'MarkerSize', msz);
 legend('MKL', 'Code gen. Strassen', 'CAPS', ['D''', 'Alberto Strassen-Winograd']);
 title('Parallel performance on $\langle N, N, N \rangle$', 'interpreter', 'latex');
 xlabel('dimension (N)'); ylabel('Effective performance (GFLOPS / core)');
 make_plot_post(8, 6);
